The libraries I will use:
library(data.table)
library(dplyr)
library(jpeg)
library(imager)
library(factoextra)
I read the data and apply PCA:
raw_data <- read.csv("/Users/ayberkakgun/Desktop/Musk1.csv") %>% as.data.frame()
features<-scale(raw_data[3:168])
labels<-raw_data[1:2]
colnames(labels)<-c("BagClass","BagId")
pca<-princomp(features)
str(pca)
## List of 7
## $ sdev : Named num [1:166] 7.19 4.8 3.55 2.9 2.86 ...
## ..- attr(*, "names")= chr [1:166] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
## $ loadings: loadings [1:166, 1:166] -0.036844 -0.06835 -0.094764 0.092619 -0.000908 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:166] "X42" "X.198" "X.109" "X.75" ...
## .. ..$ : chr [1:166] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
## $ center : Named num [1:166] 1.60e-16 -6.24e-17 3.54e-17 1.66e-17 -6.13e-17 ...
## ..- attr(*, "names")= chr [1:166] "X42" "X.198" "X.109" "X.75" ...
## $ scale : Named num [1:166] 1 1 1 1 1 1 1 1 1 1 ...
## ..- attr(*, "names")= chr [1:166] "X42" "X.198" "X.109" "X.75" ...
## $ n.obs : int 475
## $ scores : num [1:475, 1:166] -0.787 -0.255 -1.359 -1.464 -1.439 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:166] "Comp.1" "Comp.2" "Comp.3" "Comp.4" ...
## $ call : language princomp(x = features)
## - attr(*, "class")= chr "princomp"
eigenvalue<-get_eigenvalue(pca)
head(eigenvalue,10)
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 51.747369 31.238880 31.23888
## Dim.2 23.049909 13.914782 45.15366
## Dim.3 12.628826 7.623777 52.77744
## Dim.4 8.400432 5.071178 57.84862
## Dim.5 8.164036 4.928469 62.77709
## Dim.6 6.755840 4.078369 66.85545
## Dim.7 5.364907 3.238690 70.09414
## Dim.8 5.040095 3.042607 73.13675
## Dim.9 3.302223 1.993488 75.13024
## Dim.10 2.849416 1.720137 76.85038
#a First 5 components explain more than 70 % of the variability and cumulative contribution gets smaller.
eig<-eigenvalue$variance.percent[eigenvalue$cumulative.variance.percent<85]
l<-length(eig)
ggplot()+
geom_col(aes(x=1:l,y=eig))+
geom_line(aes(x=1:l,y=eig,col="red"))
If we check plot first two components we see the two dimensions are not enough to distinguish bag labels:
plot(x=pca$scores[,1],y=pca$scores[,2],col=raw_data$X1+1)
abline(h=0,v=0,lty=3)
I calculate distance matrix and perform MDS on it, without forgetting to scale the data.
distanceMat<-dist(features,upper=T) %>% as.matrix()
mds<-cmdscale(distanceMat)
plot(mds[,1],mds[,2],
# main='Location',
xlab='', ylab='',
col=raw_data$X1+1)
#b Once again perform PCA this time on aggregated data,
data_agg<-aggregate(raw_data,by=list(raw_data$X1.1),FUN=mean)
features_agg<-scale(data_agg[4:169])
pca_agg<-prcomp(features_agg)
str(pca_agg)
## List of 5
## $ sdev : num [1:92] 6.65 5.62 4.69 4.21 3.1 ...
## $ rotation: num [1:166, 1:92] -0.0536 -0.0653 -0.0607 0.0848 -0.0135 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:166] "X42" "X.198" "X.109" "X.75" ...
## .. ..$ : chr [1:92] "PC1" "PC2" "PC3" "PC4" ...
## $ center : Named num [1:166] -1.35e-16 -6.12e-17 -8.78e-17 -2.73e-17 -4.36e-17 ...
## ..- attr(*, "names")= chr [1:166] "X42" "X.198" "X.109" "X.75" ...
## $ scale : Named num [1:166] 18.1 76.8 57.4 66.2 20.5 ...
## ..- attr(*, "names")= chr [1:166] "X42" "X.198" "X.109" "X.75" ...
## $ x : num [1:92, 1:92] -1.63 -3.51 -4.39 -3.38 -5.25 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:92] "PC1" "PC2" "PC3" "PC4" ...
## - attr(*, "class")= chr "prcomp"
eigenvalue_agg<-get_eigenvalue(pca_agg)
head(eigenvalue_agg,10)
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 44.241806 26.651690 26.65169
## Dim.2 31.636561 19.058170 45.70986
## Dim.3 21.961633 13.229899 58.93976
## Dim.4 17.709779 10.668542 69.60830
## Dim.5 9.609901 5.789097 75.39740
## Dim.6 7.555042 4.551230 79.94863
## Dim.7 5.042804 3.037834 82.98646
## Dim.8 3.553406 2.140606 85.12707
## Dim.9 3.268410 1.968922 87.09599
## Dim.10 2.567582 1.546736 88.64273
plot(x=pca_agg$x[,1],y=pca_agg$x[,2],col=data_agg$X1+1)
Same MDS operation on aggregated data,
distanceMat_agg<-dist(features_agg,upper=T) %>% as.matrix()
mds_agg<-cmdscale(distanceMat_agg)
plot(mds_agg[,1],mds_agg[,2],
# main='Location',
xlab='', ylab='',
col=data_agg$X1+1)
#c
#Task 2
I read my image, and plot it.
img <- readJPEG("/Users/ayberkakgun/Desktop/profil2.jpg")
plot(c(0, 256), c(0, 256), type = "n", xlab = "", ylab = "")
rasterImage(img, 0, 0, 256, 256)
I split the channels and plot them seperately.
r<-img[1:256,1:256,1]
g<-img[1:256,1:256,2]
b<-img[1:256,1:256,3]
red_palette <- colorRampPalette(c("black","red"))
blue_palette <- colorRampPalette(c("black","blue"))
green_palette <- colorRampPalette(c("black","green"))
image(r,col=red_palette(256),main="red",axes = F)
image(g,col=green_palette(256),main="green",axes=F)
image(b,col=blue_palette(256),main="blue",axes=F)
I add noise to my image.
minx<-min(img)
maxx<-max(img)*0.1
noise<-matrix(runif(65536,min=minx,max=maxx),256)
r2<-r+noise
r2[r2>1]=1
g2<-g+noise
g2[g2>1]=1
b2<-b+noise
b2[b2>1]=1
# img2 <- rgb(r2, g2, b2)
img2<-img
img2[,,1]<-r2
img2[,,2]<-g2
img2[,,3]<-b2
plot(c(0, 256), c(0, 256), type = "n", xlab = "", ylab = "")
rasterImage(img2, 0, 0, 256, 256)
imgr <- load.image("/Users/ayberkakgun/Desktop/profil2.jpg")
imgrr<-grayscale(imgr)
vectors <-matrix(nrow=53824,ncol = 625)
k<-1
for (i in 1:232){
for(j in 1:232){
vectors[k,]<-imgrr[i:(i+24),j:(j+24)] %>% as.vector()
k<-k+1
}
}
pca_img<-princomp(vectors)
eigenvalue_img<-get_eigenvalue(pca_img)
head(eigenvalue_img,5)
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 31.1588597 68.497106 68.49711
## Dim.2 4.3178490 9.492008 77.98911
## Dim.3 1.8995843 4.175892 82.16501
## Dim.4 0.9781981 2.150391 84.31540
## Dim.5 0.6798462 1.494519 85.80992
plot(eigenvalue_img$cumulative.variance.percent)
score1 <- pca_img$scores[,1]
score2 <- pca_img$scores[,2]
score3 <- pca_img$scores[,3]
score1_scaled <- (score1 - min(score1)) / (max(score1)-min(score1))
score2_scaled <- (score2 - min(score2)) / (max(score2)-min(score2))
score3_scaled<- (score3 - min(score3)) / (max(score3)-min(score3))
imgp1 <- matrix(score1_scaled, 232, byrow=TRUE)
imgp2 <- matrix(score2_scaled, 232, byrow=TRUE)
imgp3 <- matrix(score3_scaled, 232, byrow=TRUE)
image(imgp1,col=gray.colors(256),main="",axes = F)
image(imgp2,col=gray.colors(256),main="",axes = F)
image(imgp3,col=gray.colors(256),main="",axes = F)